# Reads MCMV data pre-organized by *presidential*  electoral period
# Merges it all, and then reads and merges in electoral data
# There is an equivalent code for local (municipal/mayoral) electoral periods

# elec.period
#-1 = 1999-2002 / elec in 2002 (included but not used)
# 0 = 2003-2006 / elec in 2006 (used in parallel trends assessment, only)
# 1 = 2007-2010 / elec in 2010
# 2 = 2011-2014 / elec in 2014
# 3 = 2015-2018 / elec in 2018

# Data files invoked:
# aux_mun_code5570.Rda: from CEM
# populacaomunicipio.csv: Official population data from basededados.org
# pib_percapita.csv: from IPEA (Felix Lopez)
# Base de Dados Deficit Habitacional Municipal 2010.xlsx: from Fundação Dom Cabra
# Composicao_RMs_RIDEs_AglomUrbanas_2010_07_31.xls: from IBGE
# out-electoraldata-pres.RData: processed electoral data publicly available from TSE
# out-mayorpartybyyear.RData: processed electoral data publicly available from TSE
# out-MCMV-munelecperiodpres-mdr.RData: processed data obtained by freedom of information request to MDR
# out-firsttreated-ep-pres.RData: processed data obtained by freedom of information request to MDR
# out-treatyears-pres.RData: processed data obtained by freedom of information request to MDR

library(plyr)
library(tidyverse)
library(readxl)
library(reshape)

# Need to figure out how to have this set by the .proj file, right? 
setwd("~/Dropbox/Replication-WhatYouSee-BJPS")

# (1) Start with all municipalities 
# Create a dataset by electoral period (stacked)
load("DataPrivate/aux_mun_code5570.Rda")
mun_ids <- subset(mun_ids,select=c("uf_sigla",
                                   "NOME.DO.MUNICÍPIO",
                                   "Cod_TSE_5",
                                   "Cod_IBGE"))
names(mun_ids)[1]<- "uf"
names(mun_ids)[2]<- "mun"
mun_ids$reg <- factor(substr(mun_ids$Cod_IBGE,1,1))
d <- rbind(data.frame(mun_ids,elec.period=-1),
           data.frame(mun_ids,elec.period=0),
           data.frame(mun_ids,elec.period=1),
           data.frame(mun_ids,elec.period=2),
           data.frame(mun_ids,elec.period=3))

# (2) Merge population and other municipal characteristics 
# (2a) Here we will have one baseline population value that is fixed (pop.2008)
# And another measured at the start of each period (populacao)
pop0 <- read_csv("DataPrivate/populacaomunicipio.csv") 
pop <- subset(pop0,ano==2008,select=c(id_municipio,populacao))
pop$Cod_IBGE <- as.character(pop$id_municipio)
names(pop)[2] <- "pop.2008"
pop$id_municipio <- NULL
d <- merge(d,pop,by="Cod_IBGE",all.x=T)
# Find municipalities with missing 2008 population (non eixstant)
#unique(d$Cod_IBGE[which(is.na(d$pop.2008))])
#unique(d$mun[which(is.na(d$pop.2008))]) 
#d$sub50.2008 <- d$pop.2008<50000
#d$band.2008 <- d$pop.2008<60000 & d$pop.2008>40000

#But we will also have population at the start of the electoral period
pop <- subset(pop0,is.element(ano,c(1999,2003,2007,2011,2015)),select=c(id_municipio,populacao,ano))
pop <- na.omit(pop)
pop$elec.period <- ifelse(pop$ano==1999,-1,
                        ifelse(pop$ano==2003,0,
                          ifelse(pop$ano==2007,1,
                                 ifelse(pop$ano==2011,2,3))))
pop$Cod_IBGE <- as.character(pop$id_municipio)
pop$ano <- pop$id_municipio <- NULL
names(pop)[1]<-"pop"
d <- merge(d,pop,by=c("Cod_IBGE","elec.period"),all.x=T)

# (2b) Read GDP per capita data (at the start of each period)
gdp <- read.csv2("DataPrivate/pib_percapita.csv")
gdp$elec.period <- ifelse(gdp$ano==2003,0,
                          ifelse(gdp$ano==2007,1,
                                 ifelse(gdp$ano==2011,2,
                                        ifelse(gdp$ano==2015,3,NA))))
gdp <- na.omit(subset(gdp,select=c(id_municipio,elec.period,pib_pc)))
d <- merge(d,gdp,by.x=c("Cod_IBGE","elec.period")
           ,by.y=c("id_municipio","elec.period"),all.x=T)
## Add gdp from before first treatment
gdp <- read.csv2("DataPrivate/pib_percapita.csv")
gdp.2008 <- subset(gdp,ano==2008,select=c("id_municipio","pib_pc"))
names(gdp.2008)[2] <- "pib_pc.2008"
d <- merge(d,gdp.2008,by.x=c("Cod_IBGE")
                      ,by.y=c("id_municipio"),all.x=T)

# (2c) housing deficit in 2010 (data from 2009)
hd <- read_excel("DataPrivate/Base de Dados Deficit Habitacional Municipal 2010.xlsx",
                 sheet=1,skip=6)
names(hd)[1] <- 'NOME_NIVEL'
hd <- subset(hd,NOME_NIVEL=='MUNICÍPIO',
             select=c(codigo3,DHTT01,DHTT02))
names(hd)[2:3]  <- c("hdtot.2010","hdtotrel.2010")
hd$Cod_IBGE <- gsub("\\D","",hd$codigo3)
hd$codigo3 <- NULL
d <- merge(d,hd,by=c("Cod_IBGE"),all.x=T)

# (2d) municipalities that were in Metropolitan regions as of 2010
mr <- read_excel("DataPrivate/Composicao_RMs_RIDEs_AglomUrbanas_2010_07_31.xls",
                 sheet=1,n_max=697)
names(mr)[3] <- "Cod_IBGE"
mr<- subset(mr,Tipo=="RM",select=c(Cod_IBGE,Tipo))
mr$Tipo <- mr$Tipo=="RM"
names(mr)[2] <- "mr.2010"
d <- merge(d,mr,by="Cod_IBGE",all.x=T)
d$mr.2010[which(is.na(d$mr.2010))]<-F
rm(mr)

# (2e) get period of first treatment
load("DataPrivate/out-firsttreated-ep-pres.RData")
d <- merge(d,ft, by="Cod_IBGE",all.x=T)
rm(ft)

# (2f) get years in which houses were built in each municipality and compute treatment-years in each cycle
# zero indicates treatment in the year of the election (excluding those after election)
# one is in the year before the election, etc...
load("DataPrivate/out-treatyears-pres.RData")
d <- merge(d,years.treated, by=c("Cod_IBGE","elec.period"),all.x=T)

# (3) Get the electoral data prepared with Data_ReadElectoral_Presidential.R
load("DataPrivate/out-electoraldata-pres.RData") 
cat(comment(e))
table(e$elec.period)

# Merge pt incumbent performance
d <- merge(d,e,
           by.x=c("Cod_IBGE","elec.period"),
           by.y=c("COD_MUN_IBGE","elec.period"),all.x=T)

# Create a variable with the pre-treatment (2006) incumbent vote share
tmp.2006 <- subset(d,elec.period==0,select=c(Cod_IBGE,vs))
names(tmp.2006)[2] <- "vs.2006"
d <- merge(d,tmp.2006,by=c("Cod_IBGE"),all.x=T)
rm(tmp.2006)

# Get mayor's party (as of the election two years before the presidential)
load("DataPrivate/out-mayorpartybyyear.RData")
mayors$elec.period <- ifelse(mayors$ANO_ELEICAO==2004,0,
                                ifelse(mayors$ANO_ELEICAO==2008,1,
                                       ifelse(mayors$ANO_ELEICAO==2012,2,3)))
mayors$ANO_ELEICAO<-NULL
names(mayors)[which(names(mayors)=="SIGLA_PARTIDO")]<-"mayor.party"
d <- merge(d,mayors,by=c("Cod_TSE_5","elec.period"),all.x=T)
d$mayor.pt <- d$mayor.party=="PT"


#Merge MCMV individual contracts 
load("DataPrivate/out-MCMV-munelecperiodpres-mdr.RData")#dnnmunep; MDR LAI request (FAR+FDS+PNHR and OP)
cat(comment(dnnmunep.pres))
dnnmunep <- dnnmunep.pres
d <- merge(d,dnnmunep[,-c(2,3)],by=c("Cod_IBGE","elec.period"),all.x=T)
# Turn NAs into 0s (municipalities without MCMV in the period)
d$Nop.mdr[which(is.na(d$Nop.mdr))]<-0
d$Npnhr.mdr[which(is.na(d$Npnhr.mdr))]<-0
d$Nfar.mdr[which(is.na(d$Nfar.mdr))]<-0
# Compute the aggregates of interest to us
d$N <- d$Nop.mdr+d$Npnhr.mdr+d$Nfar.mdr

# Check numbers
table(some.MCMV=d$N>0,years.treated=d$years.treated)

# Compute cummlative MCMV (over periods) by municipality
tmp <- subset(d,select=c(Cod_IBGE,elec.period,N))
tmpN <- ddply(tmp,.(Cod_IBGE),function(x){cumsum(x$N)}) 
tmpN <- melt(tmpN,id.vars="Cod_IBGE")
names(tmpN)<-c("Cod_IBGE","elec.period","cN")
tmpN$elec.period <- as.numeric(gsub("\\D","",tmpN$elec.period))-1
d <- merge(d,tmpN,by=c("Cod_IBGE","elec.period"),all.x=T)

#Drop unused variables
d$years.treated <- NULL
d$year.treated <- NULL
d$NOME_CANDIDATO <- NULL

comment(d) <- paste("Data_Assemble_ElectoralPeriod.R on",Sys.time(), 
                    "\nN = beneficiaries in period",
                    "\ncN = cummulative beneficiaires (over periods)\n.")
save(d, file="Data/dataset-mcmv-by-preselectoral-period.RData")
